# Loading data
data <- read.csv("Documents/food_access_research_atlas.csv")
summary(data)
## CensusTract State County Urban
## Min. :1.00e+09 Length:72864 Length:72864 Min. :0.000
## 1st Qu.:1.21e+10 Class :character Class :character 1st Qu.:1.000
## Median :2.71e+10 Mode :character Mode :character Median :1.000
## Mean :2.78e+10 Mean :0.757
## 3rd Qu.:4.10e+10 3rd Qu.:1.000
## Max. :5.60e+10 Max. :1.000
## POP2010 OHU2010 GroupQuartersFlag NUMGQTRS
## Min. : 0 Min. : 0 Min. :0.00000 Min. : 0
## 1st Qu.: 2884 1st Qu.: 1102 1st Qu.:0.00000 1st Qu.: 0
## Median : 4002 Median : 1521 Median :0.00000 Median : 7
## Mean : 4237 Mean : 1602 Mean :0.00708 Mean : 110
## 3rd Qu.: 5323 3rd Qu.: 2018 3rd Qu.:0.00000 3rd Qu.: 63
## Max. :37452 Max. :16043 Max. :1.00000 Max. :19496
## PCTGQTRS LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## Min. :0.00000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.00178 Median :0.000 Median :0.000 Median :0.000
## Mean :0.02696 Mean :0.127 Mean :0.282 Mean :0.111
## 3rd Qu.:0.01552 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :1.00000 Max. :1.000 Max. :1.000 Max. :1.000
## LILATracts_Vehicle HUNVFlag LowIncomeTracts PovertyRate
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0.0
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 7.0
## Median :0.000 Median :0.000 Median :0.000 Median : 13.2
## Mean :0.149 Mean :0.224 Mean :0.424 Mean : 16.6
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.: 22.7
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :100.0
## MedianFamilyIncome LA1and10 LAhalfand10 LA1and20
## Min. : 0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.: 44837 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median : 60893 Median :0.000 Median :1.000 Median :0.000
## Mean : 67432 Mean :0.378 Mean :0.682 Mean :0.339
## 3rd Qu.: 82763 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :250000 Max. :1.000 Max. :1.000 Max. :1.000
## LATracts_half LATracts1 LATracts10 LATracts20
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00000
## Median :1.000 Median :0.000 Median :0.000 Median :0.00000
## Mean :0.638 Mean :0.334 Mean :0.044 Mean :0.00532
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.00000
## LATractsVehicle_20 LAPOP1_10 LAPOP05_10 LAPOP1_20
## Min. :0.000 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:0.000 1st Qu.: 0 1st Qu.: 62 1st Qu.: 0
## Median :0.000 Median : 53 Median : 1743 Median : 0
## Mean :0.228 Mean : 951 Mean : 2126 Mean : 888
## 3rd Qu.:0.000 3rd Qu.: 1358 3rd Qu.: 3412 3rd Qu.: 1193
## Max. :1.000 Max. :28170 Max. :32469 Max. :28170
## LALOWI1_10 LALOWI05_10 LALOWI1_20 lapophalf
## Min. : 0.0 Min. : 0.0 Min. : 0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 17.7 1st Qu.: 0 1st Qu.: 1460
## Median : 13.1 Median : 429.9 Median : 0 Median : 2769
## Mean : 277.5 Mean : 683.2 Mean : 253 Mean : 2965
## 3rd Qu.: 331.6 3rd Qu.: 1030.9 3rd Qu.: 269 3rd Qu.: 4184
## Max. :11183.0 Max. :11297.1 Max. :11183 Max. :37452
## lapophalfshare lalowihalf lalowihalfshare lakidshalf
## Min. :0.000 Min. : 0 Min. :0.0000 Min. : 0
## 1st Qu.:0.474 1st Qu.: 332 1st Qu.:0.0903 1st Qu.: 298
## Median :0.805 Median : 768 Median :0.2017 Median : 628
## Mean :0.687 Mean : 958 Mean :0.2306 Mean : 722
## 3rd Qu.:0.994 3rd Qu.: 1360 3rd Qu.:0.3403 3rd Qu.:1015
## Max. :1.000 Max. :17866 Max. :1.0000 Max. :9918
## lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf
## Min. :0.0000 Min. : 0 Min. :0.0000 Min. : 0
## 1st Qu.:0.0944 1st Qu.: 154 1st Qu.:0.0425 1st Qu.: 806
## Median :0.1782 Median : 346 Median :0.0915 Median : 2043
## Mean :0.1626 Mean : 395 Mean :0.0964 Mean : 2274
## 3rd Qu.:0.2311 3rd Qu.: 567 3rd Qu.:0.1375 3rd Qu.: 3373
## Max. :0.9081 Max. :15260 Max. :1.0000 Max. :28477
## lawhitehalfshare lablackhalf lablackhalfshare laasianhalf
## Min. :0.000 Min. : 0.0 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.239 1st Qu.: 15.3 1st Qu.:0.00415 1st Qu.: 7.0
## Median :0.572 Median : 70.0 Median :0.01731 Median : 27.9
## Mean :0.525 Mean : 334.5 Mean :0.08356 Mean : 108.8
## 3rd Qu.:0.816 3rd Qu.: 319.0 3rd Qu.:0.07713 3rd Qu.: 95.9
## Max. :1.000 Max. :14365.5 Max. :1.00000 Max. :6964.2
## laasianhalfshare lanhopihalf lanhopihalfshare laaianhalf
## Min. :0.00000 Min. : 0.00 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.00212 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 3.0
## Median :0.00713 Median : 0.31 Median :0.00007 Median : 9.0
## Mean :0.02265 Mean : 4.53 Mean :0.00099 Mean : 29.6
## 3rd Qu.:0.02181 3rd Qu.: 2.83 3rd Qu.:0.00060 3rd Qu.: 20.7
## Max. :1.00000 Max. :2786.31 Max. :0.85882 Max. :9009.0
## laaianhalfshare laomultirhalf laomultirhalfshare lahisphalf
## Min. :0.00000 Min. : 0 Min. :0.0000 Min. : 0.0
## 1st Qu.:0.00078 1st Qu.: 41 1st Qu.:0.0122 1st Qu.: 40.2
## Median :0.00224 Median : 101 Median :0.0254 Median : 122.5
## Mean :0.00760 Mean : 213 Mean :0.0470 Mean : 380.3
## 3rd Qu.:0.00488 3rd Qu.: 241 3rd Qu.:0.0561 3rd Qu.: 368.8
## Max. :1.00000 Max. :6587 Max. :1.0000 Max. :13394.3
## lahisphalfshare lahunvhalf lahunvhalfshare lasnaphalf
## Min. :0.0000 Min. : 0.0 Min. :0.0000 Min. : 0.0
## 1st Qu.:0.0120 1st Qu.: 14.4 1st Qu.:0.0103 1st Qu.: 27.2
## Median :0.0309 Median : 42.9 Median :0.0281 Median : 91.5
## Mean :0.0817 Mean : 67.2 Mean :0.0454 Mean : 134.6
## 3rd Qu.:0.0872 3rd Qu.: 91.7 3rd Qu.:0.0573 3rd Qu.: 195.9
## Max. :1.0000 Max. :1934.9 Max. :1.0000 Max. :1691.4
## lasnaphalfshare lapop1 lapop1share lalowi1
## Min. :0.0000 Min. : 0 Min. :0.000 Min. : 0
## 1st Qu.:0.0191 1st Qu.: 0 1st Qu.:0.000 1st Qu.: 0
## Median :0.0613 Median : 957 Median :0.255 Median : 210
## Mean :0.0885 Mean : 1696 Mean :0.391 Mean : 519
## 3rd Qu.:0.1272 3rd Qu.: 2909 3rd Qu.:0.800 3rd Qu.: 807
## Max. :1.0000 Max. :37002 Max. :1.000 Max. :17651
## lalowi1share lakids1 lakids1share laseniors1
## Min. :0.0000 Min. : 0 Min. :0.0000 Min. : 0
## 1st Qu.:0.0000 1st Qu.: 0 1st Qu.:0.0000 1st Qu.: 0
## Median :0.0525 Median : 204 Median :0.0545 Median : 111
## Mean :0.1240 Mean : 414 Mean :0.0926 Mean : 231
## 3rd Qu.:0.2082 3rd Qu.: 679 3rd Qu.:0.1807 3rd Qu.: 391
## Max. :1.0000 Max. :8881 Max. :0.9081 Max. :10226
## laseniors1share lawhite1 lawhite1share lablack1
## Min. :0.0000 Min. : 0 Min. :0.000 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.: 0 1st Qu.:0.000 1st Qu.: 0.0
## Median :0.0276 Median : 624 Median :0.165 Median : 10.3
## Mean :0.0564 Mean : 1383 Mean :0.319 Mean : 155.0
## 3rd Qu.:0.0989 3rd Qu.: 2376 3rd Qu.:0.627 3rd Qu.: 79.1
## Max. :1.0000 Max. :28124 Max. :1.000 Max. :12111.7
## lablack1share laasian1 laasian1share lanhopi1
## Min. :0.00000 Min. : 0.0 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.00278 Median : 5.2 Median :0.00149 Median : 0.00
## Mean :0.03661 Mean : 42.7 Mean :0.00861 Mean : 2.16
## 3rd Qu.:0.01834 3rd Qu.: 28.0 3rd Qu.:0.00661 3rd Qu.: 1.00
## Max. :1.00000 Max. :5809.0 Max. :1.00000 Max. :2163.63
## lanhopi1share laaian1 laaian1share laomultir1
## Min. :0.00000 Min. : 0.0 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.0
## Median :0.00000 Median : 2.0 Median :0.00048 Median : 26.6
## Mean :0.00047 Mean : 20.1 Mean :0.00534 Mean : 93.2
## 3rd Qu.:0.00018 3rd Qu.: 10.2 3rd Qu.:0.00246 3rd Qu.: 94.1
## Max. :0.85882 Max. :9009.0 Max. :1.00000 Max. :6146.0
## laomultir1share lahisp1 lahisp1share lahunv1
## Min. :0.0000 Min. : 0.0 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.0074 Median : 27.4 Median :0.00753 Median : 8.21
## Mean :0.0205 Mean : 161.6 Mean :0.03440 Mean : 29.03
## 3rd Qu.:0.0221 3rd Qu.: 117.8 3rd Qu.:0.02758 3rd Qu.: 40.10
## Max. :1.0000 Max. :12403.0 Max. :1.00000 Max. :1934.92
## lahunv1share lasnap1 lasnap1share lapop10
## Min. :0.0000 Min. : 0.0 Min. :0.0000 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.: 0.0 1st Qu.:0.0000 1st Qu.: 0.0
## Median :0.0055 Median : 20.1 Median :0.0132 Median : 0.0
## Mean :0.0192 Mean : 71.5 Mean :0.0460 Mean : 72.2
## 3rd Qu.:0.0254 3rd Qu.: 103.1 3rd Qu.:0.0673 3rd Qu.: 0.0
## Max. :1.0000 Max. :1521.2 Max. :1.0000 Max. :8926.3
## lapop10share lalowi10 lalowi10share lakids10
## Min. :0.0000 Min. : 0.0 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.0
## Median :0.0000 Median : 0.0 Median :0.00000 Median : 0.0
## Mean :0.0237 Mean : 28.9 Mean :0.00939 Mean : 16.6
## 3rd Qu.:0.0000 3rd Qu.: 0.0 3rd Qu.:0.00000 3rd Qu.: 0.0
## Max. :1.0000 Max. :6013.0 Max. :1.00000 Max. :3522.6
## lakids10share laseniors10 laseniors10share lawhite10
## Min. :0.00000 Min. : 0.0 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.0
## Median :0.00000 Median : 0.0 Median :0.00000 Median : 0.0
## Mean :0.00537 Mean : 12.1 Mean :0.00413 Mean : 59.2
## 3rd Qu.:0.00000 3rd Qu.: 0.0 3rd Qu.:0.00000 3rd Qu.: 0.0
## Max. :0.40220 Max. :2531.0 Max. :0.57680 Max. :5485.0
## lawhite10share lablack10 lablack10share laasian10
## Min. :0.0000 Min. : 0.0 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.0000 Median : 0.0 Median :0.00000 Median : 0.00
## Mean :0.0196 Mean : 4.7 Mean :0.00139 Mean : 0.32
## 3rd Qu.:0.0000 3rd Qu.: 0.0 3rd Qu.:0.00000 3rd Qu.: 0.00
## Max. :1.0000 Max. :4320.5 Max. :0.89282 Max. :830.24
## laasian10share lanhopi10 lanhopi10share laaian10
## Min. :0.000000 Min. : 0.00 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.000000 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 0.0
## Median :0.000000 Median : 0.00 Median :0.00000 Median : 0.0
## Mean :0.000101 Mean : 0.07 Mean :0.00003 Mean : 4.6
## 3rd Qu.:0.000000 3rd Qu.: 0.00 3rd Qu.:0.00000 3rd Qu.: 0.0
## Max. :0.282215 Max. :940.03 Max. :0.85882 Max. :8481.7
## laaian10share laomultir10 laomultir10share lahisp10
## Min. :0.0000 Min. : 0.00 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 0.0
## Median :0.0000 Median : 0.00 Median :0.00000 Median : 0.0
## Mean :0.0015 Mean : 3.29 Mean :0.00106 Mean : 5.4
## 3rd Qu.:0.0000 3rd Qu.: 0.00 3rd Qu.:0.00000 3rd Qu.: 0.0
## Max. :0.9934 Max. :1720.00 Max. :0.45455 Max. :4749.0
## lahisp10share lahunv10 lahunv10share lasnap10
## Min. :0.00000 Min. : 0.00 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 0.00 1st Qu.:0.0000 1st Qu.: 0.00
## Median :0.00000 Median : 0.00 Median :0.0000 Median : 0.00
## Mean :0.00171 Mean : 1.44 Mean :0.0013 Mean : 3.71
## 3rd Qu.:0.00000 3rd Qu.: 0.00 3rd Qu.:0.0000 3rd Qu.: 0.00
## Max. :0.91609 Max. :1833.73 Max. :0.8889 Max. :1108.69
## lasnap10share lapop20 lapop20share lalowi20
## Min. :0.00000 Min. : 0.0 Min. :0.0000 Min. : 0
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.0000 1st Qu.: 0
## Median :0.00000 Median : 0.0 Median :0.0000 Median : 0
## Mean :0.00312 Mean : 8.7 Mean :0.0037 Mean : 4
## 3rd Qu.:0.00000 3rd Qu.: 0.0 3rd Qu.:0.0000 3rd Qu.: 0
## Max. :0.88889 Max. :8850.0 Max. :1.0000 Max. :4463
## lalowi20share lakids20 lakids20share laseniors20
## Min. :0.00000 Min. : 0.00 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.00000 Median : 0.00 Median :0.00000 Median : 0.00
## Mean :0.00161 Mean : 2.13 Mean :0.00086 Mean : 1.49
## 3rd Qu.:0.00000 3rd Qu.: 0.00 3rd Qu.:0.00000 3rd Qu.: 0.00
## Max. :1.00000 Max. :2992.00 Max. :0.39454 Max. :2081.13
## laseniors20share lawhite20 lawhite20share lablack20
## Min. :0.00000 Min. : 0.0 Min. :0.00000 Min. : 0.0
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.00000 1st Qu.: 0.0
## Median :0.00000 Median : 0.0 Median :0.00000 Median : 0.0
## Mean :0.00066 Mean : 5.9 Mean :0.00271 Mean : 0.1
## 3rd Qu.:0.00000 3rd Qu.: 0.0 3rd Qu.:0.00000 3rd Qu.: 0.0
## Max. :0.47428 Max. :5485.0 Max. :0.99163 Max. :1086.0
## lablack20share laasian20 laasian20share lanhopi20
## Min. :0.000000 Min. : 0.00 Min. :0.0e+00 Min. : 0.00
## 1st Qu.:0.000000 1st Qu.: 0.00 1st Qu.:0.0e+00 1st Qu.: 0.00
## Median :0.000000 Median : 0.00 Median :0.0e+00 Median : 0.00
## Mean :0.000033 Mean : 0.05 Mean :2.2e-05 Mean : 0.01
## 3rd Qu.:0.000000 3rd Qu.: 0.00 3rd Qu.:0.0e+00 3rd Qu.: 0.00
## Max. :0.204064 Max. :785.00 Max. :2.5e-01 Max. :146.00
## lanhopi20share laaian20 laaian20share laomultir20
## Min. :0.00000 Min. : 0.0 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.:0.0000 1st Qu.: 0.00
## Median :0.00000 Median : 0.0 Median :0.0000 Median : 0.00
## Mean :0.00002 Mean : 2.1 Mean :0.0007 Mean : 0.52
## 3rd Qu.:0.00000 3rd Qu.: 0.0 3rd Qu.:0.0000 3rd Qu.: 0.00
## Max. :0.85882 Max. :6355.2 Max. :0.9934 Max. :1654.00
## laomultir20share lahisp20 lahisp20share lahunv20
## Min. :0.00000 Min. : 0 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 0 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.00000 Median : 0 Median :0.00000 Median : 0.00
## Mean :0.00022 Mean : 1 Mean :0.00038 Mean : 0.28
## 3rd Qu.:0.00000 3rd Qu.: 0 3rd Qu.:0.00000 3rd Qu.: 0.00
## Max. :0.45448 Max. :4749 Max. :0.91609 Max. :1430.08
## lahunv20share lasnap20 lasnap20share TractLOWI
## Min. :0.00000 Min. : 0.00 Min. :0.00000 Min. : 0
## 1st Qu.:0.00000 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.: 721
## Median :0.00000 Median : 0.00 Median :0.00000 Median : 1230
## Mean :0.00032 Mean : 0.48 Mean :0.00052 Mean : 1451
## 3rd Qu.:0.00000 3rd Qu.: 0.00 3rd Qu.:0.00000 3rd Qu.: 1936
## Max. :0.68458 Max. :827.72 Max. :0.60455 Max. :13234
## TractKids TractSeniors TractWhite TractBlack
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 607 1st Qu.: 318 1st Qu.: 1832 1st Qu.: 42
## Median : 921 Median : 495 Median : 2903 Median : 158
## Mean : 1018 Mean : 553 Mean : 3068 Mean : 534
## 3rd Qu.: 1310 3rd Qu.: 716 3rd Qu.: 4111 3rd Qu.: 606
## Max. :11845 Max. :17271 Max. :28983 Max. :16804
## TractAsian TractNHOPI TractAIAN TractOMultir
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 17 1st Qu.: 0.0 1st Qu.: 7.0 1st Qu.: 83
## Median : 57 Median : 1.0 Median : 15.0 Median : 184
## Mean : 201 Mean : 7.4 Mean : 40.2 Mean : 386
## 3rd Qu.: 188 3rd Qu.: 5.0 3rd Qu.: 33.0 3rd Qu.: 446
## Max. :10485 Max. :3491.0 Max. :9009.0 Max. :8839
## TractHispanic TractHUNV TractSNAP
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 87 1st Qu.: 37 1st Qu.: 69
## Median : 240 Median : 84 Median : 158
## Mean : 693 Mean : 145 Mean : 207
## 3rd Qu.: 746 3rd Qu.: 172 3rd Qu.: 293
## Max. :15420 Max. :6176 Max. :2152
1. What do we know about this dataset?
This information on supermarket availability at different distances was taken from the Food Access Research Atlas. This data gives a rich detailed summary because it measures access by the Census-Tract. Data on food access was linked with information on age, race, location (rural or urban), and income.
2. What are the limitations of the dataset?
The dataset lacks variables to monitor the median household income and vehicle accessibility across different ethnic groups. It is crucial to note that the term “vehicle access” in this context does not necessarily imply the absence of vehicles at a residence; rather, individuals are deemed to have access to a vehicle if public transportation is readily available to them. Additionally, there are no variables present to explicitly denote households without any vehicles.
3. How was the information gathered?
Beside knowing the dataset requirements for Project 1. The idea of Food Desert came from one of our team members that did some volunteer work on a number of community project in past.This data is pulled from the Food Access Research Atlas, and contains information on supermarket access at various distances. This data measures access by the Census-Tract, and as such provides a fairly granular overview.
4. What analysis has already been completed related to the content in your dataset?
Yes, there has been an analysis that is related to the content in our dataset, and it was an article, “What Are Food Deserts and Why Do They Exits? (FFAC Staff, 2022)” According to the report, many people find it difficult to picture their lives without access to wholesome food. It is an everyday occurrence for many. Not that we don’t produce enough food, but rather the fact that millions of people, especially those who reside in “food deserts,” lack access to it, is the issue.
5. How did the research you gathered contribute to your question development?
The data in the article, “What Are Food Deserts and Why Do They Exits?,” had some of the variables, such as census tract and areas that are low-income and low-access in our food desert dataset.
6. What additional information would be beneficial?
According to another article, “Food Research & Action Center Calls for WIC Funding, SNAP Benefit Adequacy as Rates of Hunger Rise” (FFAC Staff, 2023), from Food Research & Action Center (FRAC), stated today by the U.S. Department of Agriculture’s Economic Research Service (ERS), that the COVID-19 pandemic relief efforts caused hunger in America to decline the previous year, but it surged in 2022. So additional information on how covid affected food desert status would be beneficial
7. How did your question change, if at all, after Exploratory Data Analysis? still writing-up data from EDA Upon examining the query, “What percentage of food deserts are classified as low-income tracts?”, we discovered that food deserts inherently embody a convergence of low income and limited access. So we ignored it.
plot_map <- function(dataset, colx, region, subx) {
grpddata <- dataset %>%
filter(.data[[colx]] == 1) %>% # Filtering the food desert flags
group_by(State, County) %>% # Grouping by State and County
reframe(full = State, count = sum(.data[[colx]])) # Reframing to dataframe for Plotting
grpddata$county <-
paste(grpddata$County, "County") # Adding " County" text to support prerequisite of the plot_usmap() counties
grpddata <-
distinct(grpddata, .keep_all = TRUE) %>% select(full, county, count) # Ignoring the duplicates and selecting particular columns to supply the plot_usmap()
counties_df <-
us_map("counties") %>% select(fips, full, county) %>% distinct() # Getting the FIPS data of the counties. Mandatory for plotting in maps. Available from the us_map() dataframe built-in with the plot_usmap() library
merged_df <-
grpddata %>% right_join(counties_df, by = c("county", "full")) # Right joining the dataframes to get relevant FIPS code
if (region == "State") {
merged_df <-
merged_df %>% select(-fips) %>% rename(state = full) # Changing the structure for State wise plotting
}
state_map <-
plot_usmap("states", color = "#ff000030", size = 0.01)
counties_map <- plot_usmap(
data = merged_df,
values = "count",
color = "black",
size = 0.1
)
# Merging both the plots using ggplot and rendering the combined map using geom_polygon()
ggplot() +
geom_polygon(
data = counties_map[[1]],
aes(
x = x,
y = y,
group = group,
fill = counties_map[[1]]$count,
),
color = "black",
size = 0.1
) +
geom_polygon(
data = state_map[[1]],
aes(x = x,
y = y,
group = group),
color = "#ff000030",
fill = alpha(0.01)
) +
coord_equal() +
theme(
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.text = element_blank(),
plot.subtitle = element_text(size = rel(0.8)),
panel.background = element_rect(fill = "white"),
) +
scale_fill_gradient(low = 'white', high = 'grey20') +
labs(
title = paste(region, "- Wise Geographic Distribution of Food Desert in US"),
subtitle = subx,
fill = "Prevelance of FD"
)
}
plot_map(data, colx = "LILATracts_halfAnd10", region="County", subx="LI and LA half and 10 miles for Urban and Rural")
plot_map(data, colx = "LILATracts_1And10", region="County", subx="LI and LA 1 and 10 miles for Urban and Rural")
plot_map(data, colx = "LILATracts_1And20", region="County", subx="LI and LA 1 and 20 miles for Urban and Rural")
plot_map(data, colx = "LILATracts_halfAnd10", region="State", subx="LI and LA half and 10 miles for Urban and Rural")
plot_map(data, colx = "LILATracts_1And10", region="State", subx="LI and LA 1 and 10 miles for Urban and Rural")
plot_map(data, colx = "LILATracts_1And20", region="State", subx="LI and LA 1 and 20 miles for Urban and Rural")
unique_states <- data %>% group_by(State) %>% distinct(State) # Grouping by State and using distinct() to get unique states
unique_counties <- data %>% group_by(State, County) %>% distinct(County)
# Grouping by both State and County and then
# using distinct() to get unique states. Because directly Grouping the counties will result in lesser results as different
# counties in different States can exists with the same name
total_census_tracts <- nrow(data)
unique_states <- data %>% group_by(State) %>% distinct(State) # Grouping by State and using distinct() to get unique states
unique_counties <- data %>% group_by(State, County) %>% distinct(County)
# Grouping by both State and County and then
# using distinct() to get unique states. Because directly Grouping the counties will result in lesser results as different
# counties in different States can exists with the same name
total_count <- nrow(data)
urban_count <- nrow(data %>% filter(Urban == 1))
rural_count <- nrow(data %>% filter(Urban == 0))
urban_food_desert_count <- nrow(data %>% filter(Urban == 1, LILATracts_1And10 == 1))
rural_food_desert_count <- nrow(data %>% filter(Urban == 0, LILATracts_1And10 == 1))
urban_percentage <- urban_count / total_count * 100
rural_percentage <- rural_count / total_count * 100
urban_food_desert_percentage <- urban_food_desert_count / urban_count * 100
rural_food_desert_percentage <- rural_food_desert_count / rural_count * 100
overall_percentage <- (urban_food_desert_count + rural_food_desert_count) / total_count * 100
cat("There are", urban_count, "Urban Counties\n")
## There are 55172 Urban Counties
cat("There are", rural_count, "Rural Counties\n\n")
## There are 17692 Rural Counties
cat("There are", urban_food_desert_count, "Urban Food Deserts\n")
## There are 7905 Urban Food Deserts
cat("There are", rural_food_desert_count, "Rural Food Deserts\n\n")
## There are 1340 Rural Food Deserts
cat("The Urban tracts constitute", round(urban_percentage, 2), "% of the total Census tracts\n")
## The Urban tracts constitute 75.72 % of the total Census tracts
cat("The Rural tracts constitute", round(rural_percentage, 2), "% of the total Census tracts\n\n")
## The Rural tracts constitute 24.28 % of the total Census tracts
cat("Based on the dataset, it is found that", round(urban_food_desert_percentage, 2), "% of the Urban tracts are food deserts\n")
## Based on the dataset, it is found that 14.33 % of the Urban tracts are food deserts
cat("And", round(rural_food_desert_percentage, 2), "% of the Rural tracts are food deserts\n\n")
## And 7.57 % of the Rural tracts are food deserts
cat("Overall,", round(overall_percentage, 2), "% are food deserts based on the analysis made over the dataset\n")
## Overall, 12.69 % are food deserts based on the analysis made over the dataset
cat("From the analysis, it is evident that Food Deserts are more common in the Urban Areas, due to many factors like Population, Poverty Rates, Low Access and Low Income tracts")
## From the analysis, it is evident that Food Deserts are more common in the Urban Areas, due to many factors like Population, Poverty Rates, Low Access and Low Income tracts
Group Quarters : Places where people live or stay in a
group. Living arrangement that is owned or managed by an entity or
organization providing housing and/or services for the residents.
data$GroupQuartersFlag <- as.factor(data$GroupQuartersFlag)
data$LILATracts_1And10 <- as.factor(data$LILATracts_1And10)
GroupQuarters_LILA <- data[ (data$GroupQuartersFlag == 1 & data$LILATracts_1And10 == 1), c("GroupQuartersFlag", "LILATracts_1And10")]
NonGroupQuarters_LILA <- data[data$GroupQuartersFlag == 0 & data$LILATracts_1And10 == 1, c("GroupQuartersFlag", "LILATracts_1And10")]
data1 = data.frame()
Percentage_GroupQuarters_LILA = nrow(GroupQuarters_LILA)/(sum(nrow(GroupQuarters_LILA)+ nrow(NonGroupQuarters_LILA)))*100
Percentage_NonGroupQuarters_LILA = nrow(NonGroupQuarters_LILA)/(sum(nrow(GroupQuarters_LILA)+ nrow(NonGroupQuarters_LILA)))*100
data1 <- rbind(data1, Percentage_GroupQuarters_LILA)
data1 <- rbind(data1, Percentage_NonGroupQuarters_LILA)
GroupQuartersFlag = c(1,0)
data1 <- cbind(data1, GroupQuartersFlag)
colnames(data1) <- c('Percentage', 'GroupQuartersFlag')
library(ggplot2)
pie_chart <- ggplot(data1, aes(x = "", y = Percentage, fill = factor(GroupQuartersFlag))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Percentage of Group quarters in Food desert regions",
fill = "GroupQuartersFlag") +
scale_fill_manual(values = c("0" = "grey", "1" = "black"), labels = c("0", "1")) +
theme_minimal() +
theme(legend.position = "bottom")
print(pie_chart)
Analysis : The proportion of group quarters is lower
across all food deserts. Additionally, when compared to the entire
Groupquarters,a smaller fraction of Groupquarters are located in food
deserted areas.
data$State <- as.factor(data$State)
bar_chart <- ggplot(data, aes(x = State, y = PCTGQTRS)) +
geom_bar(stat = "identity", fill = "lightblue") +
labs(title = "Percentage of Group Quarters by State",
x = "State",
y = "Percentage of Group Quarters") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.2))
print(bar_chart)
Analysis:
California and New York have more group quarters tracts.
Wyoming and Delaware are states with least group quarters.
California, Illinois, and New York are among the states that rank last when compared to the food deserts chart. This demonstrates that they have more stretches of group housing but fewer areas where food is deserted.
Testing using chi-square (GOF) between 2 categorical variables (GroupQuartersFlag and LILATracts_1And10):
Null Hypothesis (H0): There is no association between the two categorical variables
Alternative Hypothesis (H1): There is an association between the two categorical variables
Significance level \(\alpha\) = 0.05
contingency_table <- table(data$GroupQuartersFlag, data$LILATracts_1And10)
chi_squared_test_result <- chisq.test(contingency_table)
chi_squared_test_result
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 37.3, df = 1, p-value = 1e-09
Analysis:
Due to the extremely low p value, H0 is rejected. This shows a large discrepancy between observed and predicted frequencies for the “GroupQuartersFlag” and “LILATracts_1And10” variables, indicating that the observed data does not follow the expected distribution.
Therefore, there is a significant association or correlation between “GroupQuartersFlag” and “LILATracts_1And10.”
data$GroupQuartersFlag <- as.factor(data$GroupQuartersFlag)
data$Urban <- as.factor(data$Urban)
GroupQuarters <- data[data$GroupQuartersFlag == 1, c("GroupQuartersFlag", "Urban")]
bar_chart <- ggplot(GroupQuarters, aes(x = GroupQuartersFlag, fill = Urban)) +
geom_bar(position = "dodge") +
labs(title = "Comparison of GroupQuartersFlag and Urban Flag",
x = "Group Quarters Flag",
y = "Count") +
scale_fill_manual(values = c("1" = "lightgreen", "0" = "magenta")) +
theme_minimal()
print(bar_chart)
Analysis: Group quarters are more in urban compared to
Rural.
data2 = data.frame()
Percentage_GroupQuarters_Poverty = nrow(GroupQuarters_Poverty)/(sum(nrow(GroupQuarters_Poverty)+ nrow(NonGroupQuarters_Poverty)))*100
Percentage_NonGroupQuarters_Poverty = nrow(NonGroupQuarters_Poverty)/(sum(nrow(GroupQuarters_Poverty)+ nrow(NonGroupQuarters_Poverty)))*100
data2 <- rbind(data2, Percentage_GroupQuarters_Poverty)
data2 <- rbind(data2, Percentage_NonGroupQuarters_Poverty)
GroupQuartersFlag = c(1,0)
data2 <- cbind(data2, GroupQuartersFlag)
colnames(data2) <- c('Percentage', 'GroupQuartersFlag')
pie_chart <- ggplot(data2, aes(x = "", y = Percentage, fill = factor(GroupQuartersFlag))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Percentage of Poverty in group quarters",
fill = "GroupQuartersFlag") +
scale_fill_manual(values = c("0" = "pink", "1" = "black"), labels = c("0", "1")) +
theme_minimal() +
theme(legend.position = "bottom")
print(pie_chart)
Analysis : Poverty rate is more in Non Group Quarters
tracts.
Applying T-Test on “PovertyRate” with respect to Group quarters:
Null Hypothesis (H0): The average Poverty rate are the same for with and without group quarters .
Alternative Hypothesis (H1): The average Poverty rate are different for with and without group quarters .
Significance level \(\alpha\) = 0.05
t_test_result <- t.test(data_outliers$PovertyRate ~ data_outliers$GroupQuartersFlag)
t_test_result
##
## Welch Two Sample t-test
##
## data: data_outliers$PovertyRate by data_outliers$GroupQuartersFlag
## t = 5.21, df = 368, p-value = 3.2e-07
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 2.4367 5.3935
## sample estimates:
## mean in group 0 mean in group 1
## 15.210 11.295
Analysis:
p value is less than \(\alpha\) (0.05). Thus, Null hypothesis is rejected, indicating that there is a significant difference between the means of poverty rate of Groupquarters and nongroupquarters.
Group quarters are more prevalent in Non-Food deserted regions and have a lower poverty incidence.
This begs the question of why food sellers choose these kinds of neighborhood settings for their stores. Or more specifically, are they even relocating their current stores to these affluent areas?
Consequently, “supermarket redlining” is becoming a common practice.
# Assuming `data` is the dataframe you have
data %>%
group_by(LILATracts_1And10) %>%
summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
## LILATracts_1And10 mean_poverty_rate
## <fct> <dbl>
## 1 0 15.1
## 2 1 26.5
data %>%
group_by(LILATracts_1And20) %>%
summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
## LILATracts_1And20 mean_poverty_rate
## <int> <dbl>
## 1 0 15.2
## 2 1 27.1
data %>%
group_by(LowIncomeTracts) %>%
summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
## LowIncomeTracts mean_poverty_rate
## <int> <dbl>
## 1 0 8.63
## 2 1 27.4
## Poverty rate is high in non low access areas
data %>%
group_by(LA1and10) %>%
summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
## LA1and10 mean_poverty_rate
## <int> <dbl>
## 1 0 18.0
## 2 1 14.2
data %>%
group_by(LA1and20) %>%
summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
## LA1and20 mean_poverty_rate
## <int> <dbl>
## 1 0 17.8
## 2 1 14.1
The mean poverty rate is 26.5 for food deserts and for non-food deserts it is 15.1. So the Poverty rate in food deserts is 75% higher than non-food deserts. Poverty rate in Low access area is 14.2 whereas in non-low access area is 18.0.
# Filtering data for rows where LA1and10 is 1
filtered_data <- data[data$LA1and10 == 1, ]
# Calculating value counts for LowIncomeTracts
value_counts <- table(filtered_data$LowIncomeTracts)
# Converting to data frame for ggplot
value_counts_df <- as.data.frame(value_counts)
names(value_counts_df) <- c("LowIncomeTracts", "Counts")
# Creating custom labels
value_counts_df$Labels <- ifelse(value_counts_df$LowIncomeTracts == 0, "Not Low Income",
ifelse(value_counts_df$LowIncomeTracts == 1, "Low Income", value_counts_df$LowIncomeTracts))
# Creating the bar plot
p <- ggplot(value_counts_df, aes(x = reorder(Labels, -Counts), y = Counts)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
labs(title = "Counts of LowIncomeTracts in low access areas", x = "LowIncomeTracts", y = "Counts") +
theme(axis.text.x = element_text(angle = 0, hjust = 1))
# Showing the plot
print(p)
#
# states_pov <- data %>%
# group_by(State) %>%
# summarise(avg_poverty_rate = mean(PovertyRate))
# #print(states_pov)
states_pov <- data %>%
group_by(State) %>%
summarise(avg_poverty_rate = mean(PovertyRate, na.rm = TRUE)) %>%
arrange(desc(avg_poverty_rate))
print(states_pov)
## # A tibble: 51 × 2
## State avg_poverty_rate
## <fct> <dbl>
## 1 Mississippi 24.6
## 2 Louisiana 21.6
## 3 Alabama 21.2
## 4 New Mexico 20.8
## 5 Georgia 20.5
## 6 Arkansas 20.4
## 7 Kentucky 20.1
## 8 District of Columbia 19.7
## 9 West Virginia 19.6
## 10 South Carolina 19.6
## # ℹ 41 more rows
print(sum(is.na(data$PovertyRate)))
## [1] 0
states_pov$norm_poverty_rate <- scale(states_pov$avg_poverty_rate)
print(states_pov)
## # A tibble: 51 × 3
## State avg_poverty_rate norm_poverty_rate[,1]
## <fct> <dbl> <dbl>
## 1 Mississippi 24.6 2.52
## 2 Louisiana 21.6 1.65
## 3 Alabama 21.2 1.53
## 4 New Mexico 20.8 1.39
## 5 Georgia 20.5 1.32
## 6 Arkansas 20.4 1.27
## 7 Kentucky 20.1 1.21
## 8 District of Columbia 19.7 1.09
## 9 West Virginia 19.6 1.06
## 10 South Carolina 19.6 1.05
## # ℹ 41 more rows
The poverty rate is normalised to find the correlation between poverty rate of states and food deserts
threshold <- quantile(states_pov$norm_poverty_rate, 0.85)
states_high_pov <- states_pov %>%
filter(norm_poverty_rate > threshold)
print(states_high_pov)
## # A tibble: 8 × 3
## State avg_poverty_rate norm_poverty_rate[,1]
## <fct> <dbl> <dbl>
## 1 Mississippi 24.6 2.52
## 2 Louisiana 21.6 1.65
## 3 Alabama 21.2 1.53
## 4 New Mexico 20.8 1.39
## 5 Georgia 20.5 1.32
## 6 Arkansas 20.4 1.27
## 7 Kentucky 20.1 1.21
## 8 District of Columbia 19.7 1.09
To address the question whether states with high poverty rate also have high number of food deserts, we are filtering only the states above 85th percentile of poverty rate.
# Calculate statewise food desert count
states_food_deserts <- data %>%
filter(LILATracts_1And10 == 1) %>%
count(State)
#print(states_food_deserts)
# Normalize food desert counts
states_food_deserts$norm_count <- scale(states_food_deserts$n)
merged_data <- inner_join(states_high_pov, states_food_deserts, by = "State")
merged_data_all <- inner_join(states_pov, states_food_deserts, by = "State")
# Get correlation
# There is no correlation between poverty rates and food desert status
# The states with high poverty don't have high number of food deserts
#print(cor(merged_data$norm_poverty_rate, merged_data$norm_count, method = "pearson"))
#### Checking correlation of poverty rate and food desert status of all the states
#### There is very less correlation between povery rate and food deserts in all states
#print(cor(states_pov$norm_poverty_rate, states_food_deserts$norm_count, method = "pearson"))
#print(merged_data)
cor.test(merged_data$norm_poverty_rate, merged_data$norm_count, method="pearson")
##
## Pearson's product-moment correlation
##
## data: merged_data$norm_poverty_rate and merged_data$norm_count
## t = 0.532, df = 6, p-value = 0.61
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.57905 0.79761
## sample estimates:
## cor
## 0.21222
#cor.test(states_pov$norm_poverty_rate, states_food_deserts$norm_count, method="pearson")
cor.test(merged_data_all$norm_poverty_rate, merged_data_all$norm_count, method="pearson")
##
## Pearson's product-moment correlation
##
## data: merged_data_all$norm_poverty_rate and merged_data_all$norm_count
## t = 3.15, df = 49, p-value = 0.0028
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.15139 0.61589
## sample estimates:
## cor
## 0.40988
Null hypothesis: There is no correlation between High poverty rate states with count of their food desert Alternate hypothesis : There is Correlation between High poverty rate states with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 0.6 is higher than the 0.05, we fail to reject the null hypothesis. The pearson correlation results tell us “There is no correlation between High poverty rate states with count of their food desert”
Null hypothesis: There is no correlation between poverty rate of all states with count of their food desert Alternate hypothesis : There is Correlation between poverty rate of all states with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 0.003 is lesser than the 0.05, we reject the null hypothesis. The pearson correlation results tell us there is statistically significant correlation between poverty rate of all states with count of their food desert.
We calculate the correlation between counties and poverty rate just like we did for the states.
counties_pov <- data %>%
group_by(County) %>%
summarise(avg_poverty_rate = mean(PovertyRate,na.rm = TRUE)) %>%
arrange(desc(avg_poverty_rate))
print(head(counties_pov))
## # A tibble: 6 × 2
## County avg_poverty_rate
## <chr> <dbl>
## 1 Radford 46.7
## 2 Wolfe 44.4
## 3 East Carroll 43.7
## 4 Corson 43.2
## 5 Hudspeth 43.2
## 6 Leflore 40.8
print(tail(counties_pov))
## # A tibble: 6 × 2
## County avg_poverty_rate
## <chr> <dbl>
## 1 Falls Church 4.07
## 2 Carver 4.04
## 3 Glasscock 3.9
## 4 Loudoun 3.85
## 5 Borden 1
## 6 Bedford City 0
counties_pov$norm_poverty_rate <- scale(counties_pov$avg_poverty_rate)
print(head(counties_pov))
## # A tibble: 6 × 3
## County avg_poverty_rate norm_poverty_rate[,1]
## <chr> <dbl> <dbl>
## 1 Radford 46.7 4.66
## 2 Wolfe 44.4 4.30
## 3 East Carroll 43.7 4.19
## 4 Corson 43.2 4.12
## 5 Hudspeth 43.2 4.11
## 6 Leflore 40.8 3.73
county_threshold <- quantile(counties_pov$norm_poverty_rate, 0.85)
counties_high_pov <- counties_pov %>%
filter(norm_poverty_rate > county_threshold)
print(head(counties_high_pov))
## # A tibble: 6 × 3
## County avg_poverty_rate norm_poverty_rate[,1]
## <chr> <dbl> <dbl>
## 1 Radford 46.7 4.66
## 2 Wolfe 44.4 4.30
## 3 East Carroll 43.7 4.19
## 4 Corson 43.2 4.12
## 5 Hudspeth 43.2 4.11
## 6 Leflore 40.8 3.73
# Calculate statewise food desert count
counties_food_deserts <- data %>%
filter(LILATracts_1And10 == 1) %>%
count(County)
#print(counties_food_deserts)
# Normalize food desert counts
counties_food_deserts$norm_count <- scale(counties_food_deserts$n)
#length(counties_food_deserts$norm_poverty_rate)
#length(counties_food_deserts$norm_count)
counties_merged_data <- inner_join(counties_high_pov, counties_food_deserts, by = "County")
# Get correlation
# There is no correlation between poverty rates and food desert status
# The states with high poverty don't have high number of food deserts
#print(cor(counties_merged_data$norm_poverty_rate, counties_merged_data$norm_count, method = "pearson"))
#### Checking correlation of poverty rate and food desert status of all the states
#### There is very less correlation between povery rate and food deserts in all states
counties_all_merged_data <- inner_join(counties_pov, counties_food_deserts, by = "County")
#print(cor(counties_all_merged_data$norm_poverty_rate, counties_all_merged_data$norm_count, method = "pearson"))
cor.test(counties_merged_data$norm_poverty_rate, counties_merged_data$norm_count, method="pearson")
##
## Pearson's product-moment correlation
##
## data: counties_merged_data$norm_poverty_rate and counties_merged_data$norm_count
## t = 0.000915, df = 234, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.12764 0.12776
## sample estimates:
## cor
## 5.9828e-05
cor.test(counties_all_merged_data$norm_poverty_rate, counties_all_merged_data$norm_count, method="pearson")
##
## Pearson's product-moment correlation
##
## data: counties_all_merged_data$norm_poverty_rate and counties_all_merged_data$norm_count
## t = 0.535, df = 1331, p-value = 0.59
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.039059 0.068301
## sample estimates:
## cor
## 0.014663
# print(sum(is.na(counties_all_merged_data$norm_poverty_rate)))
# sum(is.infinite(counties_all_merged_data$norm_poverty_rate))
# print(sum(is.na(counties_all_merged_data$norm_count)))
# sum(is.infinite(counties_all_merged_data$norm_count))
#print(counties_all_merged_data)
Null hypothesis: There is no correlation between High poverty rate counties with count of their food desert Alternate hypothesis : There is Correlation between High poverty rate counties with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 1 is higher than the 0.05, we fail to reject the null hypothesis. The correlation coefficient is cor 5.98e-05 . The pearson correlation results tell us “There is no correlation between High poverty rate counties with count of their food desert”
Null hypothesis: There is no correlation between poverty rate of all counties with count of their food desert. Alternate hypothesis : There is Correlation between poverty rate of all counties with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 0.003 is lesser than the 0.05, we reject the null hypothesis.The correlation coefficient is cor 0.0147. The pearson correlation results tell us there is no statistically significant correlation between poverty rate of all counties with count of their food desert.
data %>%
filter(LILATracts_1And10 == 1) %>%
ggplot(aes(x = lahunv1share)) +
geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
geom_density(alpha = 0.5,fill= "red", color="darkred") +
labs(title = "Percentage of total population who does not have vehicles in food desert")
data %>%
filter(LILATracts_1And10 == 0) %>%
ggplot(aes(x = lahunv1share)) +
geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
geom_density(alpha = 0.5,fill= "red", color="darkred") +
labs(title = "Percentage of total population who does not have vehicles in non-food desert")
data %>%
filter(LILATracts_1And10 == 1 & Urban == 1) %>%
ggplot(aes(x = lahunv1share)) +
geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
geom_density(alpha = 0.5,fill = "red", color="darkred") +
labs(title = "Percentage of urban population who does not have vehicles in food desert")
data %>%
filter(LILATracts_1And10 == 1 & Urban == 0) %>%
ggplot(aes(x = lahunv1share)) +
geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
geom_density(alpha = 0.5,fill = "red", color="darkred") +
labs(title = "Percentage of rural population who does not have vehicles in food desert")
data %>%
filter(LILATracts_1And10 == 0 & Urban == 1) %>%
ggplot(aes(x = lahunv1share)) +
geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
geom_density(alpha = 0.5,fill = "red", color="darkred") +
labs(title = "Percentage of urban population who does not have vehicles in non-food desert")
data %>%
filter(LILATracts_1And10 == 0 & Urban == 0) %>%
ggplot(aes(x = lahunv1share)) +
geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
geom_density(alpha = 0.5,fill = "red", color="darkred") +
labs(title = "Percentage of rural population who does not have vehicles in non-food desert")
# fd_urban <- data %>%
# filter(LILATracts_1And10 == 1 & Urban == 1) %>%
# summarise(per_without_vehicle_urban = mean(lahunv1share,na.rm = TRUE))
# print(fd_urban)
#
# fd_rural <- data %>%
# filter(LILATracts_1And10 == 1 & Urban == 0) %>%
# summarise(per_without_vehicle_rural = mean(lahunv10share,na.rm = TRUE))
# print(fd_rural)
# fd_vehicle <- data %>%
# filter(LILATracts_1And10 == 1) %>%
# summarise(per_with_vehicle = mean(lahunv10share,na.rm = TRUE))
# print(fd_vehicle)
result <- data %>%
filter(LILATracts_1And10 == 1) %>%
group_by(Urban) %>%
summarise(
lahunv1share_mean = mean(lahunv1share, na.rm = TRUE),
lahunv10share_mean = mean(lahunv10share, na.rm = TRUE)
)
print(result)
## # A tibble: 2 × 3
## Urban lahunv1share_mean lahunv10share_mean
## <fct> <dbl> <dbl>
## 1 0 0.0668 0.0408
## 2 1 0.0546 0.000162
fd_urban = subset(result, Urban == 1)["lahunv1share_mean"] * 100
fd_rural = subset(result, Urban == 0)["lahunv10share_mean"] * 100
fd_vehicle = 100 - fd_urban + fd_urban
#print(result[[2, "lahunv1share_mean"]])
#print(fd_urban)
data_pie <- data.frame(
Category = c("Urban without vehicles", "Rural without vehicles", "Population with Vehicles"),
Percentage = c(result[[2, "lahunv1share_mean"]] * 100 ,result[[1, "lahunv10share_mean"]] * 100, 100 - (result[[1, "lahunv1share_mean"]] + result[[2, "lahunv10share_mean"]])* 100)
)
#print(data_pie)
# Generate the pie chart
ggplot(data_pie, aes(x = "", y = Percentage, fill = Category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start=0) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
) +
labs(fill = "") +
scale_fill_manual(values = c("skyblue", "darkred", "pink"))
Around 5% urban population in food desert doesn’t have vehicles. Around 4% rural population in food desert doesn’t have vehicles. 91% population in food desert have vehicles.
data$Urban <- as.factor(data$Urban)
levels(data$Urban) <- c("Rural", "Urban")
ggplot(data = data,
aes(x = interaction(LILATracts_1And10, Urban), y = lahunv1share)) +
geom_boxplot(fill='white', color="darkred") +
labs(title="Percentage of total population who does not have vehicles",
x ="Types of tracts", y = "Population share without vehicles") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1))
Seems like there is a lot of outliers in the data to make any conclusions. So let’s use outlier removed data.
app_wo_gre_outlier$Urban <- as.factor(app_wo_gre_outlier$Urban)
levels(app_wo_gre_outlier$Urban) <- c("Rural", "Urban")
app_wo_gre_outlier$LILATracts_1And10 <- as.factor(app_wo_gre_outlier$LILATracts_1And10)
levels(app_wo_gre_outlier$LILATracts_1And10) <- c("Non-food desert", "Food desert")
ggplot(data = app_wo_gre_outlier,
aes(x = interaction(LILATracts_1And10, Urban), y = lahunv1share)) +
geom_boxplot(fill='white', color="darkred") +
labs(title="Percentage of total population who does not have vehicles - outliers removed",
x ="Types of tracts", y = "Population share without vehicles") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1))
The non-food desert urban areas have the smallest median percentage of people without vehicle access. This is likely because urban areas often have alternative modes of transportation, such as public transit, walking, or biking, making vehicle ownership less necessary. Moreover, urban areas often have amenities and services within walking distance, further reducing the need for a vehicle.
In both rural and urban contexts, food deserts show a higher median percentage of people without vehicle access compared to non-food deserts. This is a significant concern because it suggests that people in food deserts, who already might have limited access to healthy food options, might also face transportation challenges.
While non-food desert urban areas have the lowest median percentage of people without vehicle access, food desert rural areas have the highest. This highlights the impact of food desert status on transportation barriers, irrespective of the urban or rural context.
After importing the dataset, food_access_research_atlas.csv, we identified the variables LILATracts_1And10 and LILATracts_1And20 that had the values, 0, 1 and we assigned value 1 as access and 0 as non-acces.
#dfCA = subset(colAdm)
dfFD = subset(data)
fooddesert_LILATracts_1And10_access = data.frame(dfFD$LILATracts_1And10 == 1)
print("access")
## [1] "access"
fooddesert_LILATracts_1And10_access = dfFD%>%
filter(LILATracts_1And10==1)
#colAdm_rejected = dfCA$admit == 0
#print("rejected")
#colAdm_rejected = dfCA%>%
#filter(admit==0)
fooddesert_LILATracts_1And10_nonaccess = data.frame(dfFD$LILATracts_1And10 == 0)
print("non-access")
## [1] "non-access"
fooddesert_LILATracts_1And10_nonaccess = dfFD%>%
filter(LILATracts_1And10==0)
fooddesert_LILATracts_1And20_access = data.frame(dfFD$LILATracts_1And20 == 1)
print("access")
## [1] "access"
fooddesert_LILATracts_1And20_access = dfFD%>%
filter(LILATracts_1And20==1)
fooddesert_LILATracts_1And20_nonaccess = data.frame(dfFD$LILATracts_1And20 == 0)
print("non-access")
## [1] "non-access"
fooddesert_LILATracts_1And20_nonaccess = dfFD%>%
filter(LILATracts_1And20==0)
Describe any interesting patterns revealed with this data visualization.
library(ggplot2)
library(tidyverse)
fg <- data %>%
count(LILATracts_1And10) %>%
mutate(
perc = round(proportions(n) * 100, 1),
res = str_c(n, "(", perc, ")%"),
LILATracts_1And10 = as.factor(LILATracts_1And10)
)
ggplot(fg, aes(LILATracts_1And10, n, fill = LILATracts_1And10)) +
geom_col() +
geom_text(aes(label = res), vjust = -0.5) + scale_fill_discrete(labels = c("Nonaccess", "Access"))
fg1 <- data %>%
count(LILATracts_1And20) %>%
mutate(
perc = round(proportions(n) * 100, 1),
res = str_c(n, "(", perc, ")%"),
LILATracts_1And20 = as.factor(LILATracts_1And20)
)
ggplot(fg1, aes(LILATracts_1And20, n, fill = LILATracts_1And20)) +
geom_col() +
geom_text(aes(label = res), vjust = -0.5) + scale_fill_discrete(labels = c("Nonaccess", "Access"))
Food Deserts impact in demographic groups of the society.
LILA_df <- data[data$LILATracts_1And10 == 1,]
LILA_Urban <- LILA_df[LILA_df$Urban == 1,]
LILA_Rural <- LILA_df[LILA_df$Urban == 0,]
Percentage_LILA_White <- ((sum(LILA_Urban$lawhite1) + sum(LILA_Rural$lawhite10))/sum(data$TractWhite))*100
Percentage_LILA_Black <- ((sum(LILA_Urban$lablack1) + sum(LILA_Rural$lablack10))/sum(data$TractBlack))*100
Percentage_LILA_Asian <- ((sum(LILA_Urban$laasian1) + sum(LILA_Rural$laasian10))/sum(data$TractAsian))*100
Percentage_LILA_Hisp <- ((sum(LILA_Urban$lahisp1) + sum(LILA_Rural$lahisp10))/sum(data$TractHispanic))*100
Percentage_LILA_Hopi <- ((sum(LILA_Urban$lanhopi1) + sum(LILA_Rural$lanhopi10))/sum(data$TractNHOPI))*100
Percentage_LILA_Multir <- ((sum(LILA_Urban$laomultir1) + sum(LILA_Rural$laomultir10))/sum(data$TractOMultir))*100
Percentage_LILA_Aian <- ((sum(LILA_Urban$laaian1) + sum(LILA_Rural$laaian10))/sum(data$TractAIAN))*100
data3 = data.frame()
data3 <- rbind(data3, Percentage_LILA_White)
data3 <- rbind(data3, Percentage_LILA_Black)
data3 <- rbind(data3, Percentage_LILA_Asian)
data3 <- rbind(data3, Percentage_LILA_Hisp)
data3 <- rbind(data3, Percentage_LILA_Hopi)
data3 <- rbind(data3, Percentage_LILA_Multir)
data3 <- rbind(data3, Percentage_LILA_Aian)
Groups = c("White", "Black", "Asian", "Hisp", "Hopi", "OMultir", "Aian")
data3 <- cbind(data3, Groups)
colnames(data3) <- c('Percentage', 'Groups')
bar_chart <- ggplot(data3, aes(x = Groups, y = Percentage)) +
geom_bar(stat = "identity", fill = "navy") +
labs(title = "Percentage of each demographic groups by Food deserts",
x = "Demographic Groups",
y = "Percentage") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=1))
print(bar_chart)
AIAN - American Indian or Alaska Native
HOPI - Native Hawaiian or Other Pacific Islander
OMultir - other/multiple race
HISP - Hispanic or Latino.
we can see from the chart that AIAN and Black neighborhoods are more likely to lack access to supermarkets than other groups.
And the least is Asian group.
This prompts the question of whether “food deserts” are a result of structural racial inequity.
LILA_df <- data[data$LILATracts_1And10 == 1,]
LILA_Urban <- LILA_df[LILA_df$Urban == 1,]
LILA_Rural <- LILA_df[LILA_df$Urban == 0,]
Percentage_LILA_kids <- ((sum(LILA_Urban$lakids1) + sum(LILA_Rural$lakids10))/sum(data$TractKids))*100
NonLILA_df <- data[data$LILATracts_1And10 == 0,]
NonLILA_Urban <- NonLILA_df[NonLILA_df$Urban == 1,]
NonLILA_Rural <- NonLILA_df[NonLILA_df$Urban == 0,]
Percentage_NonLILA_kids <- ((sum(NonLILA_Urban$lakids1) + sum(NonLILA_Rural$lakids10))/sum(data$TractKids))*100
data5 = data.frame()
data5 <- rbind(data5, Percentage_LILA_kids)
data5 <- rbind(data5, Percentage_NonLILA_kids)
LILA = c(1,0)
data5 <- cbind(data5, LILA)
colnames(data5) <- c('Percentage', 'LILA')
pie_chart <- ggplot(data5, aes(x = "", y = Percentage, fill = factor(LILA))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Percentage of Kids in Food desert regions",
fill = "Food desert regions") +
scale_fill_manual(values = c("0" = "#5b84c4", "1" = "#F98125"), labels = c("0", "1")) +
theme_minimal() +
theme(legend.position = "bottom")
print(pie_chart)
Percentage_LILA_Seniors <- ((sum(LILA_Urban$laseniors1) + sum(LILA_Rural$laseniors10))/sum(data$TractSeniors))*100
Percentage_NonLILA_Seniors <- ((sum(NonLILA_Urban$laseniors1) + sum(NonLILA_Rural$laseniors10))/sum(data$TractSeniors))*100
data6 = data.frame()
data6 <- rbind(data6, Percentage_LILA_Seniors)
data6 <- rbind(data6, Percentage_NonLILA_Seniors)
LILA = c(1,0)
data6 <- cbind(data6, LILA)
colnames(data6) <- c('Percentage', 'LILA')
pie_chart <- ggplot(data6, aes(x = "", y = Percentage, fill = factor(LILA))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Percentage of Seniors in Food desert regions",
fill = "Food desert regions") +
scale_fill_manual(values = c("0" = "#F54F52", "1" = "#93F03B"), labels = c("0", "1")) +
theme_minimal() +
theme(legend.position = "bottom")
print(pie_chart)
From these pie charts we can view that both kids and seniors live more in non-deserted regions.
Are they intentionally choosing neighborhoods near supermarkets which limits the opportunity for other groups to live better?
df2010 = data.frame(read.csv("Documents/Food_Deserts_in_US_2010.csv"))
knitr::kable(head(df2010, 5), format ='markdown')
| CensusTract | State | County | LILATracts_1And10 | LILATracts_halfAnd10 | LILATracts_1And20 | LILATracts_Vehicle | Urban | Rural | LA1and10 | LAhalfand10 | LA1and20 | LATracts_half | LATracts1 | LATracts10 | LATracts20 | LATractsVehicle_20 | HUNVFlag | GroupQuartersFlag | OHU2010 | NUMGQTRS | PCTGQTRS | LowIncomeTracts | POP2010 | UATYP10 | lapophalf | lapophalfshare | lalowihalf | lalowihalfshare | lakidshalf | lakidshalfshare | laseniorshalf | laseniorshalfshare | lahunvhalf | lahunvhalfshare | lapop1 | lapop1share | lalowi1 | lalowi1share | lakids1 | lakids1share | laseniors1 | laseniors1share | lahunv1 | lahunv1share | lapop10 | lapop10share | lalowi10 | lalowi10share | lakids10 | lakids10share | laseniors10 | laseniors10share | lahunv10 | lahunv10share | lapop20 | lapop20share | lalowi20 | lalowi20share | lakids20 | lakids20share | laseniors20 | laseniors20share | lahunv20 | lahunv20share | LILA_2010_POP | State_name |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3.6005e+10 | NY | Bronx | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3019 | 0 | 0 | 1 | 8731 | U | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | New York |
| 3.6005e+10 | NY | Bronx | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1824 | 0 | 0 | 1 | 5491 | U | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | New York |
| 3.6005e+10 | NY | Bronx | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1042 | 0 | 0 | 1 | 3113 | U | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | New York |
| 3.6005e+10 | NY | Bronx | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 513 | 0 | 0 | 1 | 1597 | U | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | New York |
| 3.6005e+10 | NY | Bronx | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1092 | 0 | 0 | 1 | 3413 | U | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | New York |
df2010$State_name <- as.factor(df2010$State_name)
LILA_df2010 <- df2010[df2010$LILATracts_1And10 == 1, c("POP2010", "State_name")]
LILA_df2015 <- data[data$LILATracts_1And10 == 1, c("POP2010", "State")]
colnames(LILA_df2015) <- c("POP2015", "State")
colnames(LILA_df2010) <- c("POP2010", "State")
In 2010, 37614936 of US Population lived in Food deserted tracts.
In 2015, 0 of US Population lived in Food deserted tracts.
Within 5 years, From 2010 to 2015, this population is
increased by 2 million. Which is actually a alarming
fact.
library(dplyr)
summed_df_2015 <- LILA_df2015 %>%
group_by(State) %>%
summarise(POP2015 = sum(POP2015))
summed_df_2010 <- LILA_df2010 %>%
group_by(State) %>%
summarise(POP2010 = sum(POP2010))
merged_df <- full_join(summed_df_2015, summed_df_2010, by = "State")
merged_df$PopulationDifference <- merged_df$POP2015 - merged_df$POP2010
# Create a bar chart
library(ggplot2)
p <- ggplot(merged_df, aes(x = State)) +
geom_bar(aes(y = POP2015, fill = "2015"), stat = "identity", position = "dodge") +
geom_bar(aes(y = POP2010, fill = "2010"), stat = "identity", position = "dodge") +
labs(y = "Population", fill = "Year") +
scale_fill_manual(values = c("2010" = "gold", "2015" = "black")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.2))
options(
repr.plot.width = 25, # Set the desired width in inches
repr.plot.height = 4 # Set the desired height in inches
)
print(p)
bar_chart <- ggplot(merged_df, aes(x = State, y = PopulationDifference)) +
geom_bar(stat = "identity", fill = "lightblue", width = 0.5) +
labs(title = "Population change in Food quarters by State",
x = "State",
y = "Percentage change") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.2))
options(
repr.plot.width = 25, # Set the desired width in inches
repr.plot.height = 4 # Set the desired height in inches
)
print(bar_chart)
Ohio, Georgia, and Florida have strong positive bars, indicating that the population rise is greater.
Few states, like Texas and Minnesota, have seen a decline in the population of areas that are food deserts over time
library(usmap)
merged_dff <- merged_df[merged_df$State != "District of Columbia",]
merged_dff <- merged_dff[order(merged_dff$State),]
map_df <- data.frame(
state = state.name,
diff = merged_dff$PopulationDifference)
plot_usmap(data = map_df, values = "diff", labels = FALSE)
The map shows that Eastern US states have experienced a greater population growth in food deserted areas than other US regions.